###############################################################################-
# Author: Pietryka
# Contact: matthew.pietryka@gmail.com
# Purpose: fit regressions for appendix Table S1
# Notes: 
###############################################################################-



#  1. Load Packages  =====================

library(dplyr)
library(tidyr)
library(readr)
library(purrr)
library(estimatr)
library(stringr)
library(glue)
library(modelsummary)

#  2. Load Data =====================

stacked_df <- read_rds("data-files/stacked_df.rds")

source("Plot Preferences.r")




# fit regressions  -----------------------

fit_2016 <-  lm_robust(
  turnout_2016_f ~ turnout_2016_rm + parents_turnout_mean0_f,  
  clusters = hall_id,
  data = stacked_df
  )

fit_2018 <-  lm_robust(
  turnout_2018_f ~ turnout_2018_rm + parents_turnout_mean0_f,  
  clusters = hall_id,
  data = stacked_df
)

fit_2020 <-  lm_robust(
  turnout_2020_f ~ turnout_2020_rm + parents_turnout_mean0_f,  
  clusters = hall_id,
  data = stacked_df
)

fit_n_elections <-  lm_robust(
  n_turnout_f ~ n_turnout_rm + parents_turnout_mean0_f,  
  clusters = hall_id,
  data = stacked_df
)




# Format Table ------------------------------




cov_labs <- c(
  "turnout_2016_rm" = "Roommate voted in election (0 = No; 1 = Yes)",
  "turnout_2018_rm" = "Roommate voted in election (0 = No; 1 = Yes)",
  "turnout_2020_rm" = "Roommate voted in election (0 = No; 1 = Yes)",
  "n_turnout_rm" = "Roommate's number of elections voted (0-3)" ,
  "parents_turnout_mean0_f" =  "Parents' turnout, '08-14 (as a proportion)" ,
  "(Intercept)" = "Intercept"
)

# Define the goodness-of-fit stats to include
gof_df <- tribble(
  ~raw, ~clean, ~fmt,
  "nobs", "N", 0,
  "aic", "AIC", 0,
  "rmse", "RMSE", 2,
  "r.squared", "R^2", 3
)

mod_list <- list(
  `Turnout in 2016` = fit_2016, 
  `Turnout in 2018` = fit_2018, 
  `Turnout in 2020` = fit_2020, 
  `Number of elections voted` = fit_n_elections
  )



round_p <- function(p){
  p_round = round(p/2, 2) |> format(, nsmall = 2)
  ifelse(p_round < .01, "< .01", as.character(p_round))
}



display_mods <- function(list_of_mods, path = "default"){
  modelsummary(
    list_of_mods,
    coef_map = cov_labs,
    gof_map = gof_df,
    shape =  term ~ model + statistic,
    fmt = NULL,
    output  = path,
    estimate = "{round(estimate, 2)}",
    statistic = c(`s.e.` = "({round(std.error, 3)})", p = "{round_p(p.value/2)}"),
    notes = c(
      "Notes:", "Linear regression models fit in R with the `estimatr` package 
    (Blair et al 2025). Standard errors  clustered by first-year residence hall_id 
    with the CR2 variance estimator (Bell and McCaffrey 2002) and one-tailed 
    p-values.",
      "Observations are directed dyads.",
      "The outcomes for the first three models are indicators of whether the focal 
    student voted in the election. The outcome in the fourth model is the number 
    of elections in which the focal student voted."
    )
  )
}

display_mods(mod_list)



# save ----------------------


display_mods(mod_list, path = "Results/regressions.docx")
sessionInfo()




